name: Integration Tests

on:
  workflow_dispatch:
  pull_request:
    # You can name your branch dev-foo to get CI runs.
    branches: [main, 'dev-**']
  merge_group:
    branches: [main, 'dev-**']
    types: [checks_requested]
  push:
    branches: [main]

concurrency:
  group: ${{ github.ref }}
  cancel-in-progress: ${{ github.ref != 'refs/heads/master' }}

permissions: read-all

env:
  TRITON_BUILD_WITH_CLANG_LLD: "TRUE"
  TRITON_USE_ASSERT_ENABLED_LLVM: "TRUE"
  TRITON_DISABLE_LINE_INFO: 1

jobs:
  Runner-Preparation:
    runs-on: ubuntu-latest
    timeout-minutes: 30
    outputs:
      matrix-CUDA: ${{ steps.set-matrix.outputs.matrix-CUDA }}
      matrix-HIP: ${{ steps.set-matrix.outputs.matrix-HIP }}
    steps:
      - name: Decide pre-submit integration test enablement
        # Always enable integration tests for pre-submit pull requests.
        if: github.event_name == 'pull_request'
        run: |
          echo "enable_integration=true" >> $GITHUB_ENV

      - name: Checkout post-submit commits
        if: github.event_name == 'push'
        uses: actions/checkout@v4
        with:
          # Only fetch two commits to check the latest changed files.
          fetch-depth: 2

      - name: Detect post-submit changed files
        id: detect-change
        if: github.event_name == 'push'
        uses: tj-actions/changed-files@v44
        with:
          # Monitor hash or version files in the cmake/ directory for changes.
          files: |
            cmake/*.txt

      - name: Decide post-submit integration test enablement
        # Only enable integration tests for post-submit when build dependency changes.
        if: github.event_name == 'push' && steps.detect-change.outputs.any_changed == 'true'
        run: |
          echo "enable_integration=true" >> $GITHUB_ENV

      - name: Prepare runner matrix
        id: set-matrix
        if: env.enable_integration == 'true'
        run: |
          if [ x"${{ github.repository }}" == x"openai/triton" ]; then
            echo '::set-output name=matrix-CUDA::[["self-hosted", "A100"], ["self-hosted", "H100"]]'
            echo '::set-output name=matrix-HIP::[["self-hosted", "gfx90a"]]'
          else
            echo '::set-output name=matrix-CUDA::["ubuntu-latest"]'
            echo '::set-output name=matrix-HIP::["ubuntu-latest"]'
          fi

  pre-commit:
    name: pre-commit (code formatting)
    if: github.event_name == 'pull_request'
    needs: Runner-Preparation
    runs-on: ubuntu-latest

    steps:
      - name: Checkout
        uses: actions/checkout@v4

      - uses: actions/setup-python@v5
        with:
          python-version: '3.12'
          cache: 'pip'

      - name: Compute hash of pre-commit config
        id: cache-key
        run: |
          echo "pre_commit_hash=$(sha256sum .pre-commit-config.yaml)" >> $GITHUB_OUTPUT
        shell: bash

      - name: Cache pre-commit's cache dir
        uses: actions/cache@v4
        with:
          # Note that we cannot use environment variables here given there is
          # no shell to interpret them in the paths.
          path: |
            ~/.cache/pre-commit
          key: ${{ runner.os }}-${{ steps.cache-key.outputs.pre_commit_hash }}

      - name: Check pre-commit
        run: |
          python3 -m pip install --upgrade pre-commit
          # TODO: ignore the first yapf failure until https://github.com/google/yapf/issues/1164 is fixed
          python3 -m pre_commit run --all-files --verbose yapf &> /dev/null || true
          # If first run of yapf worked and made changes reset the tree to the original state
          git reset --hard
          python3 -m pre_commit run --all-files --verbose

  Integration-Tests:
    needs: Runner-Preparation

    runs-on: ${{ matrix.runner }}
    timeout-minutes: 30

    strategy:
      matrix:
        runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-CUDA)}}

    steps:
      - name: Checkout
        uses: actions/checkout@v4
        with:
          submodules: "true"

      - name: Compute build dependency cache keys
        id: cache-key
        run: |
          echo "llvm=$(cat cmake/llvm-hash.txt | cut -c 1-8)" >> $GITHUB_OUTPUT
          echo "pybind11=$(cat cmake/pybind11-version.txt)" >> $GITHUB_OUTPUT
          echo "nvidia=$(cat cmake/nvidia-toolchain-version.txt)" >> $GITHUB_OUTPUT
        shell: bash

      - name: Cache build dependency
        uses: actions/cache@v4
        with:
          # Note that we cannot use environment variables here given there is
          # no shell to interpret them in the paths.
          path: |
            ~/.triton/llvm
            ~/.triton/nvidia
            ~/.triton/pybind11
          key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-pybind11-${{ steps.cache-key.outputs.pybind11 }}

      - name: Inspect cache directory
        run: |
          mkdir -p ~/.triton
          ls -alh ~/.triton

      - name: Update PATH
        run: |
          echo "$HOME/.local/bin" >> $GITHUB_PATH

      - name: Install apt dependencies
        run: |
          sudo apt-get update -y
          sudo apt-get install -y ccache clang lld

      - name: Install pip dependencies
        run: |
          python3 -m pip install --upgrade pip
          python3 -m pip install wheel cmake==3.24 ninja pytest-xdist lit

      - name: Install Triton
        run: |
          echo "PATH is '$PATH'"
          cd python
          TRITON_BUILD_WITH_CCACHE=true python3 -m pip install --no-build-isolation -vvv '.[tests]'

      - name: Run lit tests
        run: |
          cd python
          LIT_TEST_DIR="build/$(ls build | grep -i cmake)/test"
          if [ ! -d "${LIT_TEST_DIR}" ]; then
            echo "Coult not find '${LIT_TEST_DIR}'" ; exit -1
          fi
          lit -v "${LIT_TEST_DIR}"

      - name: Run python tests on CUDA
        run: |
          cd python/test/unit
          python3 -m pytest -vvv -n 8 --ignore=hopper/test_flashattention.py --ignore=runtime --ignore=language/test_line_info.py --ignore=language/test_subprocess.py
          python3 -m pytest -vvv -n 8 language/test_subprocess.py
          # Run runtime tests serially to avoid race condition with cache handling
          python3 -m pytest -vvv runtime/
          # Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
          TRITON_DISABLE_LINE_INFO=0 python3 -m pytest -vvv language/test_line_info.py
          # Run hopper/test_flashattention.py separately to avoid out of gpu memory
          python3 -m pytest -vs hopper/test_flashattention.py

      - name: Run interpreter tests
        if: ${{matrix.runner[0] == 'self-hosted' && matrix.runner[1] == 'H100'}}
        env:
          TRITON_INTERPRET: "1"
        run: |
          cd python/test/unit
          python3 -m pytest -vvv -n 16 -m interpreter language/test_core.py language/test_standard.py \
           language/test_random.py language/test_block_pointer.py operators/test_flash_attention.py::test_op \
           --device cpu

      - name: Run C++ unittests
        run: |
          cd python
          cd "build/$(ls build | grep -i cmake)"
          ctest

      - name: Inspect cache directory
        run: |
          mkdir -p ~/.triton
          ls -alh ~/.triton
          du -sh ~/.triton/**


  Integration-Tests-AMD:
    needs: Runner-Preparation

    runs-on: ${{ matrix.runner }}
    timeout-minutes: 30

    strategy:
      matrix:
        runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-HIP)}}

    container:
      image: rocm/pytorch:rocm6.0.2_ubuntu22.04_py3.10_pytorch_2.1.2
      options: --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root

    steps:
      - name: Checkout
        uses: actions/checkout@v4
        with:
          submodules: 'true'

      - name: Compute build dependency cache keys
        id: cache-key
        run: |
          echo "llvm=$(cat cmake/llvm-hash.txt | cut -c 1-8)" >> $GITHUB_OUTPUT
          echo "pybind11=$(cat cmake/pybind11-version.txt)" >> $GITHUB_OUTPUT
          echo "nvidia=$(cat cmake/nvidia-toolchain-version.txt)" >> $GITHUB_OUTPUT
        shell: bash

      - name: Clear triton cache directory
        run: |
          rm -rf ~/.triton
          mkdir -p ~/.triton
          ls -alh ~/.triton

      - name: Cache build dependency
        uses: actions/cache@v4
        with:
          # Note that we cannot use environment variables here given there is
          # no shell to interpret them in the paths.
          path: |
            ~/.triton/llvm
            ~/.triton/nvidia
            ~/.triton/pybind11
          key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-pybind11-${{ steps.cache-key.outputs.pybind11 }}

      - name: Inspect cache directory
        run: |
          mkdir -p ~/.triton
          ls -alh ~/.triton

      - name: Update PATH
        run: |
          echo "/opt/rocm/llvm/bin" >> $GITHUB_PATH

      - name: Install pip dependencies
        run: |
          python3 -m pip install lit

      - name: Install Triton
        run: |
          echo "PATH is '$PATH'"
          pip uninstall -y triton
          cd python
          pip install -v -e .

      - name: Run lit tests
        run: |
          cd python
          LIT_TEST_DIR="build/$(ls build | grep -i cmake)/test"
          if [ ! -d "${LIT_TEST_DIR}" ]; then
            echo "Coult not find '${LIT_TEST_DIR}'" ; exit -1
          fi
          lit -v "${LIT_TEST_DIR}"

      - name: Run python tests on HIP
        run: |
          cd python/test/unit
          pytest --capture=tee-sys -rfs -vvv -n 32 language operators \
                 --ignore=language/test_conversions.py \
                 --ignore=language/test_line_info.py \
                 --ignore=operators/test_blocksparse.py
          # Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
          TRITON_DISABLE_LINE_INFO=0 python3 -m pytest -vvv -n 8 language/test_line_info.py

          # Run runtime tests serially to avoid race condition with cache handling
          python3 -m pytest -vvv runtime
          # Run hopper tests
          python3 -m pytest -vs hopper/test_mixed_io.py \
                                hopper/test_tma_store_gemm.py

      - name: Run C++ unittests
        run: |
          cd python
          cd "build/$(ls build | grep -i cmake)"
          ctest

      - name: Inspect cache directory
        run: |
          mkdir -p ~/.triton
          ls -alh ~/.triton
          du -sh ~/.triton/**
